Student Debt over Time
#Process the data
sc_time <- read_csv('2010_2019_student_debt.csv')
Error: '2010_2019_student_debt.csv' does not exist in current working directory ('/Users/ConnieXu/Dropbox (Business)/Spring 2021/QMSS 5063 - Data Visualization /Group_G_HigherEd/ideas_drafts').
sum(sc_time$UGDS,na.rm=TRUE)
[1] 90801007
Below is an interactive line graph (2010-2019) which details the trends in student debt over the years.
# CPI Inflation Rates - Got Average Yearly Inflation Rate for Scaling for Student Debt
install.packages('quantmod')
Error in install.packages : Updating loaded packages
library(quantmod)
getSymbols("CPIAUCSL", src='FRED')
[1] "CPIAUCSL"
avg.cpi <- apply.yearly(CPIAUCSL, mean)
cf <- as.data.frame(avg.cpi/as.numeric(avg.cpi['2009']))
cf$Year_Ending <- format(as.Date(row.names(cf), format="%Y-%m-%d"),"%Y")
# Merged for Inflation
sc_time_df <- sc_time %>% group_by(`Year_Ending`) %>% mutate(`Average Annual Student Debt - National` = sum(DEBT_MDN_STUDENT,na.rm=TRUE)/sum(UGDS,na.rm=TRUE)) %>% ungroup() %>%
dplyr::mutate(uni_rank = case_when(
ADM_RATE < 0.2 ~ 'elite/highly selective',
ADM_RATE < 0.3 ~ 'more selective',
ADM_RATE < 0.5 ~ 'selective',
ADM_RATE < 0.7 ~ 'less selective',
TRUE ~ 'not selective')) %>%
mutate(uni_rank = factor(uni_rank, levels=c('not selective', 'less selective', 'selective',
'more selective', 'elite/highly selective'))) %>%
group_by(uni_rank,Year_Ending) %>%
mutate(`Average Annual Student Debt (by Selectivity)` = sum(DEBT_MDN_STUDENT,na.rm=TRUE)/sum(UGDS,na.rm=TRUE)) %>%
ungroup() %>%
group_by(`Year_Ending`,`Average Annual Student Debt (by Selectivity)`,
uni_rank,`Average Annual Student Debt - National`) %>% summarize() %>%
merge(cf) %>%
mutate(`Adjusted Average Annual Student Debt` = `Average Annual Student Debt (by Selectivity)`/
CPIAUCSL) %>%
mutate(`Adjusted Average Annual Student Debt - Composite` = `Average Annual Student Debt - National`/
CPIAUCSL)
`summarise()` regrouping output by 'Year_Ending', 'Average Annual Student Debt (by Selectivity)', 'uni_rank' (override with `.groups` argument)
sc_df <- sc_time_df %>% group_by(`Average Annual Student Debt - National`,`Adjusted Average Annual Student Debt - Composite`,Year_Ending) %>% summarize() %>% mutate(uni_rank='national average') %>% mutate(`Adjusted Average Annual Student Debt`=`Adjusted Average Annual Student Debt - Composite`) %>% dplyr::mutate(`Average Annual Student Debt (by Selectivity)` = `Average Annual Student Debt - National`) %>% merge(cf) %>% select(Year_Ending,`Average Annual Student Debt (by Selectivity)`, uni_rank, `Average Annual Student Debt - National`, CPIAUCSL, `Adjusted Average Annual Student Debt`,`Adjusted Average Annual Student Debt - Composite`)
`summarise()` regrouping output by 'Average Annual Student Debt - National', 'Adjusted Average Annual Student Debt - Composite' (override with `.groups` argument)
sc_time_df <- sc_time_df %>% rbind(sc_df) %>% mutate(uni_rank = factor(uni_rank, levels=c('national average','not selective', 'less selective', 'selective', 'more selective', 'elite/highly selective'))) %>%
mutate(national = ifelse(uni_rank == 'national average', 'y','n'))
sc_df
sc_time_df
brewer.pal(n=10,"PuBuGn")
n too large, allowed maximum for palette PuBuGn is 9
Returning the palette you asked for with that many colors
[1] "#FFF7FB" "#ECE2F0" "#D0D1E6" "#A6BDDB" "#67A9CF" "#3690C0" "#02818A" "#016C59" "#014636"
ShortPuBuGn <- c("#D0D1E6","#A6BDDB","#67A9CF","#3690C0","#02818A")
p <- sc_time_df %>%
ggplot(.,aes(x=Year_Ending,y=`Adjusted Average Annual Student Debt`, color=uni_rank, group=national)) +
geom_point() + geom_line(aes(linetype=national)) +
scale_color_manual(values=c('grey',"#D0D1E6","#A6BDDB","#67A9CF","#3690C0","#02818A"))+
theme(
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.background= element_rect(fill="white")) +
scale_x_continuous(breaks = round(seq(min(sc_time$Year_Ending), max(sc_time$Year_Ending), by = 2),1)) +
labs(x='', y='Median Loan Amount per Student\n(thousands)**',
title='Student Debt Has Been Rising Over The Years',
color='',fill='', caption='**-inflation adjusted')
ggplotly(p)
NA
NA
The following is (instead) a bar graph with fewer ‘university selectivity’ buckets.
Chloropleth showing average student debt (this is only 2019 but I have a filter at the beginning that will allow me to incorporate a slider for the year).
leaflet(states_2019) %>% addProviderTiles("CartoDB.Positron") %>%
addPolygons(fillColor = ~pal(states_2019$`Average Student Loans`),
color = "white",
weight = 0.5,
fillOpacity = 0.7,
highlight = highlightOptions(
weight = 5,
color = "#666",
fillOpacity = 0.7,
bringToFront = TRUE,
),popup=pop_pop) %>%
leaflet::addLegend(position = "bottomleft", pal = pal, values = c(paste('$',round(min(states_2019$`Average Student Loans`))),
paste('$',round(max(states_2019$`Average Student Loans`)))),
title = "Average Student Loans (Per Student)") %>%
leaflet::setView(-98.5795, 39.8282, zoom=3)
Error in paste("$", round(min(states_2019$`Average Student Loans`))) :
object 'states_2019' not found
brewer.pal(n = 8, name = "RdYlGn")
leaflet(sc_time_2019_selective) %>% addTiles('http://{s}.basemaps.cartocdn.com/dark_all/{z}/{x}/{y}.png') %>%
addCircles(col = ~pal1(sc_time_2019_selective$uni_rank),
radius = ~DEBT_MDN,
popup = content,
fillOpacity = 0.7) %>%
leaflet::addLegend(position = "bottomleft",pal = pal1, values = sc_time_2019_selective$uni_rank,
title = "Average Student Loans (Per Student)")
Assuming "long" and "lat" are longitude and latitude, respectively
Data contains 931 rows with either missing or invalid lat/lon values and will be ignored
setwd('~/Dropbox (Business)/Spring 2021/QMSS 5063 - Data Visualization /Group_G_HigherEd/')
The working directory was changed to /Users/ConnieXu/Dropbox (Business)/Spring 2021/QMSS 5063 - Data Visualization /Group_G_HigherEd inside a notebook chunk. The working directory will be reset when the chunk is finished running. Use the knitr root.dir option in the setup chunk to change the working directory for notebook chunks.
sc_time <- read_csv('src/2010_2019_student_debt.csv')
Missing column names filled in: 'X1' [1]
── Column specification ────────────────────────────────────────────────────────────────────────────────────────────────────────
cols(
.default = col_double(),
INSTNM = col_character(),
CITY = col_character(),
STABBR = col_character(),
ZIP = col_character(),
UNEMP_RATE = col_logical(),
DEBT_MDN = col_character(),
MN_EARN_WNE_P10 = col_logical(),
MD_EARN_WNE_P10 = col_logical(),
School = col_logical(),
State = col_logical(),
True = col_character(),
ADM_RATE_ALL_1 = col_character()
)
ℹ Use `spec()` for the full column specifications.
28660 parsing failures.
row col expected actual file
3223 UNITID a double Community College 'src/2010_2019_student_debt.csv'
3223 School 1/0/T/F/TRUE/FALSE Kenai Peninsula College 'src/2010_2019_student_debt.csv'
3223 State 1/0/T/F/TRUE/FALSE Alaska 'src/2010_2019_student_debt.csv'
3224 UNITID a double Community College 'src/2010_2019_student_debt.csv'
3224 School 1/0/T/F/TRUE/FALSE Kodiak College 'src/2010_2019_student_debt.csv'
.... ...... .................. ....................... ................................
See problems(...) for more details.
sc_time
The code chunk below has not been touched and includes Shiny code for later use.
setwd('~/Dropbox (Business)/Spring 2021/QMSS 5063 - Data Visualization /Group_G_HigherEd/')
The working directory was changed to /Users/ConnieXu/Dropbox (Business)/Spring 2021/QMSS 5063 - Data Visualization /Group_G_HigherEd inside a notebook chunk. The working directory will be reset when the chunk is finished running. Use the knitr root.dir option in the setup chunk to change the working directory for notebook chunks.
sc_time <- read_csv('src/2010_2019_student_debt.csv')
Missing column names filled in: 'X1' [1]
── Column specification ────────────────────────────────────────────────────────────────────────────────────────────────────────
cols(
.default = col_double(),
INSTNM = col_character(),
CITY = col_character(),
STABBR = col_character(),
ZIP = col_character(),
UNEMP_RATE = col_logical(),
DEBT_MDN = col_character(),
MN_EARN_WNE_P10 = col_logical(),
MD_EARN_WNE_P10 = col_logical(),
School = col_logical(),
State = col_logical(),
True = col_character(),
ADM_RATE_ALL_1 = col_character()
)
ℹ Use `spec()` for the full column specifications.
28660 parsing failures.
row col expected actual file
3223 UNITID a double Community College 'src/2010_2019_student_debt.csv'
3223 School 1/0/T/F/TRUE/FALSE Kenai Peninsula College 'src/2010_2019_student_debt.csv'
3223 State 1/0/T/F/TRUE/FALSE Alaska 'src/2010_2019_student_debt.csv'
3224 UNITID a double Community College 'src/2010_2019_student_debt.csv'
3224 School 1/0/T/F/TRUE/FALSE Kodiak College 'src/2010_2019_student_debt.csv'
.... ...... .................. ....................... ................................
See problems(...) for more details.
library(reshape2)
library(shiny)
ui <- fluidPage(
titlePanel("I love Graphs about Student Debt"),
# CODE BELOW: Add select input named "sex" to choose between "M" and "F"
selectInput('year',
'Year',
choices=c(2010,2011,2012,2013,2014,2015,2016,2017,2018,2019)),
# Add plot output to display top 10 most popular names
leafletOutput("studentdebtmap")
)
server <- function(input, output, session){
# Render plot of top 10 most popular names
sc_time_selective <- reactive({
sc_time %>%
dplyr::filter(Year_Ending == input$year) %>%
dplyr::filter(DEBT_MDN !='PrivacySuppressed') %>%
transform(DEBT_MDN = as.numeric(DEBT_MDN)) %>%
dplyr::mutate(DEBT_MDN = ifelse(is.na(DEBT_MDN), 0, DEBT_MDN)) %>%
mutate(DEBT_MDN_STUDENT = DEBT_MDN*UGDS) %>%
group_by(STABBR) %>%
mutate(`Average Student Loans`=sum(DEBT_MDN_STUDENT,na.rm=TRUE)/sum(UGDS,na.rm=TRUE)) %>%
dplyr::rename(lat = LATITUDE) %>%
dplyr::rename(long = LONGITUDE) %>%
dplyr::rename(state = STABBR) %>%
filter(ADM_RATE < 0.3) %>%
dplyr::mutate(uni_rank = case_when(
ADM_RATE < 0.05 ~ 'elite',
ADM_RATE < 0.2 ~ 'highly selective',
TRUE ~ 'selective'))
})
output$studentdebtmap <- renderLeaflet({
content <- paste(
"School",sc_time_selective$INSTNM,"<br/>",
"Number of Undergrads:",sc_time_selective$UGDS,"<br/>",
"Selectivity:", sc_time_selective$uni_rank,"<br/>",
"Median Debt:",paste('$',round(sc_time_selective$DEBT_MDN,2)),"<br/>",
"Year",sc_time_selective$`Year_Ending`)
leaflet (sc_time) %>% addTiles('http://{s}.basemaps.cartocdn.com/dark_all/{z}/{x}/{y}.png') %>%
addCircles(col = ~pal1(sc_time$uni_rank),
radius = ~DEBT_MDN,
popup = content,
fillOpacity = 0.7) %>%
leaflet::addLegend(position = "bottomleft",pal = pal1, values = sc_time$uni_rank,
title = "Average Student Loans (Per Student)")
})
observe({
leafletProxy("studentdebtmap", data = sc_time_selective())
})
}
shinyApp(ui = ui, server = server)
Listening on http://127.0.0.1:5747
Error in $: object of type 'closure' is not subsettable [No stack trace available]
NA
ui <- fluidPage(
titlePanel('The Cost of Higher Education: An Exploration of Student Debt in American Universities')
)
---
title: "Visuals_Draft_04_14"
author: "Connie Xu"
date: "4/14/2021"
output: html_notebook
---

```{r setup, include=FALSE}
setwd("~/Dropbox (Business)/Spring 2021/QMSS 5063 - Data Visualization /Group_G_HigherEd/src/visuals/")
knitr::opts_chunk$set(echo = TRUE)
source("ourtheme.R")
```

```{r packages, echo=FALSE, eval=TRUE, warning=FALSE, message=FALSE}
r = getOption("repos")
r["CRAN"] = "http://cran.us.r-project.org"
options(repos = r)
# install.packages (basic)
suppressMessages(library(dplyr))
suppressMessages(library(tidyverse))

# install.packages (reading)
suppressMessages(library(XML))
suppressMessages(library(RCurl))
suppressMessages(library(readr))
suppressMessages(library("readxl"))

# install.packages (themes)
suppressMessages(library(ggthemes))
suppressMessages(library(ggrepel))
suppressMessages(library(RColorBrewer))
suppressMessages(library(viridis))
suppressMessages(library(hrbrthemes))
suppressMessages(library(plotly))


# install.packages (maps)
suppressMessages(library(RgoogleMaps))
suppressMessages(library(ggmap))
suppressMessages(install.packages("maps"))
suppressMessages(install.packages("tmap")) # install the CRAN version
suppressMessages(library(tmap))
suppressMessages(install.packages('rgeos'))

suppressMessages(library(devtools))
# Let's install the development version from Github. Run
devtools::install_github("rstudio/leaflet")
```

```{r import general data, echo=TRUE, eval=TRUE, message=FALSE, warning=FALSE}
# Set the WD as Group_G_HigherEd 
setwd("~/Dropbox (Business)/Spring 2021/QMSS 5063 - Data Visualization /Group_G_HigherEd")

# While our initial College Scorecard only included 2019 initially, I was able to run a simple python script to concatenate and select a small number of relevant columns for our visualization and analysis. Thus, as our first visualizations only unclude 2019, the code below is for re-filtering the concatenated 2010-2019 data back into simply 2019. 

sc_time <- read_csv('src/2010_2019_student_debt.csv') 
sc <- sc_time %>% filter(Year_Ending == 2019)

library(educationdata)
# Test Run with using get_education_data 
# data <- get_education_data(level = "college-university",
#     source = "ipeds",
#     topic = "directory",
#     filters = list(year = 2019))
# data
# Scorecard data - 2019 

## change projection of sc data
sc <- sc %>%
  dplyr::mutate(uni_rank = case_when(
    ADM_RATE < 0.2 ~ 'highly selective/elite',
    ADM_RATE < 0.3 ~ 'more selective',
    ADM_RATE < 0.5 ~ 'selective',
    ADM_RATE < 0.7 ~ 'less selective',
    TRUE ~ 'not selective')) %>% mutate(uni_rank = factor(uni_rank, levels=c('not selective', 'less selective', 'selective', 'more selective', 'highly selective/elite')))
```

## Simple Scattergram 

First we are going to try to present this pattern for different tiers of universities (admission rate as well as debt)
```{r, echo=TRUE, eval=TRUE}
# Remove PrivacySuppressed Records and transform Debt Median into a numeric value - we can also do this on the main sc df
sc$DEBT_MDN[is.na(sc$DEBT_MDN)] <- 0;

brewer.pal(n=10,"PuBuGn")
ShortPuBuGn <- c("#D0D1E6","#A6BDDB","#67A9CF","#3690C0","#02818A")

m <- sc %>% subset(DEBT_MDN !='PrivacySuppressed') %>% transform(DEBT_MDN = as.numeric(DEBT_MDN)) %>% 
              ggplot(., aes(x=ADM_RATE, y=DEBT_MDN,color=uni_rank)) +
  geom_point(pch=21) +
  geom_smooth(color='navy', se = FALSE) +
  scale_color_manual(values=ShortPuBuGn)+
  theme(
    panel.grid.major = element_blank(),
    panel.grid.minor = element_blank(),
    panel.background= element_rect(fill="white")) +
  scale_y_discrete(limits=c(0,10000,20000,30000), labels=c('0','10','20','30')) +
  labs(x='Admissions Rate', y='Median Loan Amount per Student\n(thousands)', 
       title='Student Debt and Admissions Rate',
       color='Selectivity')
m
```

This is a data table showing the breakdown of the university ranks (as I chose to rank them). I will note that from the dt alone wwe are seeing a downward trend in the 'Median Student Loans' column. 
```{r, echo=TRUE, eval=TRUE}
# Create Data Table (Summarized) for 
library('scales')

sc_dt <- sc %>% subset(DEBT_MDN !='PrivacySuppressed') %>% transform(DEBT_MDN = as.numeric(DEBT_MDN)) %>% group_by(uni_rank) %>% mutate(`Number of Universities` = n()) %>% ungroup() %>% mutate(DEBT_MDN_STUDENTS = DEBT_MDN*UGDS) %>% group_by(uni_rank) %>% mutate(`Median Student Loans` = paste('$',round(sum(DEBT_MDN_STUDENTS, na.rm=TRUE)/sum(UGDS, na.rm=TRUE),2))) %>% 
  mutate(`Min Acceptance Rate` = percent(min(ADM_RATE))) %>% mutate(`Max Acceptance Rate` = percent(max(ADM_RATE))) %>% ungroup() %>% 
  group_by(uni_rank,`Median Student Loans`,`Number of Universities`,`Min Acceptance Rate`,`Max Acceptance Rate`) %>% 
  summarize()

install.packages('DT')
library(DT)
table <- datatable(sc_dt,style = "default",filter = 'top',  caption = 'Universities and Selectivity')
table

```

## Scattergram as Violin Plot
```{r, echo=TRUE, eval=TRUE}
m <- sc %>% subset(DEBT_MDN !='PrivacySuppressed') %>% transform(DEBT_MDN = as.numeric(DEBT_MDN)) %>% 
  ggplot(., aes(x=uni_rank, y=DEBT_MDN)) +
  geom_violin(aes(fill=uni_rank,color=uni_rank)) +
  geom_boxplot(width = 0.2)+
  scale_fill_manual(values=ShortPuBuGn) +
  scale_colour_manual(values=ShortPuBuGn) +
  theme(
    panel.grid.major = element_blank(),
    panel.grid.minor = element_blank(),
    panel.background= element_rect(fill="white"))+
  scale_y_discrete(limits=c(0,10000,20000,30000), labels=c('0','10','20','30')) +
  scale_x_discrete(labels=c('not\nselective\n(>70%)','less\nselective\n(50%-70%)',
                            'selective\n(30%-50%)','more\nselective\n(20%-30%)','highly\nselective\n(5%-20%)','elite\n(<5%)')) +

  labs(x='Selectivity\n(admission rate thresholds)', y='Median Loan Amount per Student\n(thousands)', 
       title='Selective Schools and Student Debt',
       color='',fill='')
m
```
Showing the previous scattergram specifically as violing plots

```{r, echo=TRUE, eval=TRUE}
ipeds15 <- get_education_data(level = "college-university",
    source = "ipeds",
    topic = "grad-rates-pell",
    filters = list(year = 2015))
ipeds15
```


## Student Debt over Time

```{r, echo=TRUE, eval=TRUE}

#Process the data 
sc_time <- read_csv('2010_2019_student_debt.csv') 

sc_time<- sc_time %>% subset(DEBT_MDN !='PrivacySuppressed') %>% 
  transform(DEBT_MDN = as.numeric(DEBT_MDN)) %>% 
  dplyr::mutate(DEBT_MDN = ifelse(is.na(DEBT_MDN), 0, DEBT_MDN))

```


```{r, echo=TRUE, eval=TRUE}
#Process the data 
sc_time <- read_csv('2009_2019_student_debt.csv') 

sc_time<- sc_time %>% subset(DEBT_MDN !='PrivacySuppressed') %>% 
  transform(DEBT_MDN = as.numeric(DEBT_MDN)) %>% 
  dplyr::mutate(DEBT_MDN = ifelse(is.na(DEBT_MDN), 0, DEBT_MDN)) %>% 
  mutate(DEBT_MDN_STUDENT = DEBT_MDN*UGDS)
sc_time

sum(sc_time$UGDS,na.rm=TRUE)
```

Below is an interactive line graph (2010-2019) which details the trends in student debt over the years. 
```{r, echo=TRUE, eval=TRUE}
# CPI Inflation Rates - Got Average Yearly Inflation Rate for Scaling for Student Debt 
install.packages('quantmod')
library(quantmod)
getSymbols("CPIAUCSL", src='FRED')
avg.cpi <- apply.yearly(CPIAUCSL, mean)
cf <- as.data.frame(avg.cpi/as.numeric(avg.cpi['2009'])) 
cf$Year_Ending <- format(as.Date(row.names(cf), format="%Y-%m-%d"),"%Y")

# Merged for Inflation 
sc_time_df <- sc_time %>% group_by(`Year_Ending`) %>% mutate(`Average Annual Student Debt - National` = sum(DEBT_MDN_STUDENT,na.rm=TRUE)/sum(UGDS,na.rm=TRUE)) %>% ungroup() %>% 
  dplyr::mutate(uni_rank = case_when(
    ADM_RATE < 0.2 ~ 'elite/highly selective',
    ADM_RATE < 0.3 ~ 'more selective',
    ADM_RATE < 0.5 ~ 'selective',
    ADM_RATE < 0.7 ~ 'less selective',
    TRUE ~ 'not selective')) %>%
  mutate(uni_rank = factor(uni_rank, levels=c('not selective', 'less selective', 'selective', 
                                              'more selective', 'elite/highly selective'))) %>%
  group_by(uni_rank,Year_Ending) %>% 
  mutate(`Average Annual Student Debt (by Selectivity)` = sum(DEBT_MDN_STUDENT,na.rm=TRUE)/sum(UGDS,na.rm=TRUE)) %>% 
  ungroup() %>% 
  group_by(`Year_Ending`,`Average Annual Student Debt (by Selectivity)`,
           uni_rank,`Average Annual Student Debt - National`) %>% summarize() %>% 
  merge(cf) %>% 
  mutate(`Adjusted Average Annual Student Debt` = `Average Annual Student Debt (by Selectivity)`/
           CPIAUCSL) %>% 
  mutate(`Adjusted Average Annual Student Debt - Composite` = `Average Annual Student Debt - National`/
           CPIAUCSL)

sc_df <- sc_time_df %>% group_by(`Average Annual Student Debt - National`,`Adjusted Average Annual Student Debt - Composite`,Year_Ending) %>% summarize() %>% mutate(uni_rank='national average') %>% mutate(`Adjusted Average Annual Student Debt`=`Adjusted Average Annual Student Debt - Composite`) %>% dplyr::mutate(`Average Annual Student Debt (by Selectivity)` = `Average Annual Student Debt - National`) %>% merge(cf) %>% select(Year_Ending,`Average Annual Student Debt (by Selectivity)`, uni_rank, `Average Annual Student Debt - National`, CPIAUCSL, `Adjusted Average Annual Student Debt`,`Adjusted Average Annual Student Debt - Composite`)
sc_time_df <- sc_time_df %>% rbind(sc_df) %>% mutate(uni_rank = factor(uni_rank, levels=c('national average','not selective', 'less selective', 'selective', 'more selective', 'elite/highly selective'))) %>% 
  mutate(national = ifelse(uni_rank == 'national average', 'y','n'))
sc_df
sc_time_df


p <- sc_time_df %>% 
  ggplot(.,aes(x=Year_Ending,y=`Adjusted Average Annual Student Debt`, color=uni_rank, group=national)) + 
  geom_point() + geom_line(aes(linetype=national)) + 
  scale_color_manual(values=c('grey',"#D0D1E6","#A6BDDB","#67A9CF","#3690C0","#02818A"))+
  theme(
    panel.grid.major = element_blank(),
    panel.grid.minor = element_blank(),
    panel.background= element_rect(fill="white")) +
  scale_x_continuous(breaks = round(seq(min(sc_time$Year_Ending), max(sc_time$Year_Ending), by = 2),1)) +
  labs(x='', y='Inflation-Adjusted Median Loan Amount per Student\n(thousands)', 
       title='Student Debt Has Been Rising Over The Years',
       color='',fill='',group='')
ggplotly(p)


```

The following is (instead) a bar graph with fewer 'university selectivity' buckets. 
```{r}
sc_time_df <- sc_time %>% group_by(`Year_Ending`) %>% mutate(`Average Annual Student Debt - National` = sum(DEBT_MDN_STUDENT,na.rm=TRUE)/sum(UGDS,na.rm=TRUE)) %>% ungroup() %>% 
  dplyr::mutate(uni_rank = case_when(
    ADM_RATE < 0.2 ~ 'highly selective',
    ADM_RATE < 0.5 ~ 'moderately selective',
    TRUE ~ 'less/not at all selective')) %>%
  mutate(uni_rank = factor(uni_rank, levels=c('less/not at all selective', 'moderately selective', 'highly selective'))) %>%
  group_by(uni_rank,Year_Ending) %>% 
  mutate(`Average Annual Student Debt (by Selectivity)` = sum(DEBT_MDN_STUDENT,na.rm=TRUE)/sum(UGDS,na.rm=TRUE)) %>% 
  ungroup() %>% 
  group_by(`Year_Ending`,`Average Annual Student Debt (by Selectivity)`,
           uni_rank,`Average Annual Student Debt - National`) %>% summarize() %>% 
  merge(cf) %>% 
  mutate(`Adjusted Average Annual Student Debt` = `Average Annual Student Debt (by Selectivity)`/
           CPIAUCSL) %>% 
  mutate(`Adjusted Average Annual Student Debt - Composite` = `Average Annual Student Debt - National`/
           CPIAUCSL)

sc_df <- sc_time_df %>% group_by(`Average Annual Student Debt - National`,`Adjusted Average Annual Student Debt - Composite`,Year_Ending) %>% summarize() %>% mutate(uni_rank='national average') %>% mutate(`Adjusted Average Annual Student Debt`=`Adjusted Average Annual Student Debt - Composite`) %>% dplyr::mutate(`Average Annual Student Debt (by Selectivity)` = `Average Annual Student Debt - National`) %>% merge(cf) %>% select(Year_Ending,`Average Annual Student Debt (by Selectivity)`, uni_rank, `Average Annual Student Debt - National`, CPIAUCSL, `Adjusted Average Annual Student Debt`,`Adjusted Average Annual Student Debt - Composite`)
sc_time_df <- sc_time_df %>% rbind(sc_df) %>% mutate(uni_rank = factor(uni_rank, levels=c('national average','less/not at all selective', 'moderately selective', 'highly selective'))) 

fig1 <- sc_time_df %>% plot_ly(x = ~Year_Ending, y = ~`Adjusted Average Annual Student Debt`, type = 'bar',
  color = ~uni_rank, alpha=0.8, hovertemplate = 'Average Debt/Student (USD): %{y} <extra></extra>',colors='Purples') %>% 
  layout(yaxis = list(
  title = "Average Annual Student Debt per Student\n(Adjusted for Inflation)"))

fig1

```

Chloropleth showing average student debt (this is only 2019 but I have a filter at the beginning that will allow me to incorporate a slider for the year).

```{r Student Debt map - Chloropleth, echo=TRUE, eval=TRUE }
# Additions of States df from Tigris File 
library(tigris)
states <- states(cb = TRUE)

# Can change to sc_time_year 
sc_time_2019 <- sc_time %>% subset(Year_Ending = 2019) %>% 
  group_by(STABBR) %>% mutate(`Average Student Loans`=sum(DEBT_MDN_STUDENT,na.rm=TRUE)/sum(UGDS,na.rm=TRUE))
sc_time_2019_state <- sc_time_2019 %>% group_by(STABBR,`Average Student Loans`) %>% summarize()

# States 
states_2019 <- states %>% 
  inner_join(sc_time_2019_state, by=c(STUSPS='STABBR')) 


library(leaflet.providers)
library(leaflet)
#used 'success' measures. 

pal = colorFactor(ShortPuBuGn, domain = states_2019$`Average Student Loans`)
states_2019
pop_pop <- paste("State:",states_2019$NAME,"<br/>",
                 "Average Student Loans","<br/>",
                 "of Schools Located in State:",paste('$',round(states_2019$`Average Student Loans`)))

leaflet(states_2019) %>% addProviderTiles("CartoDB.Positron") %>%
  addPolygons(fillColor = ~pal(states_2019$`Average Student Loans`),
              color = "white",
              weight = 0.5,
              fillOpacity = 0.7,  
              highlight = highlightOptions(
                weight = 5,
                color = "#666",
                fillOpacity = 0.7,
                bringToFront = TRUE,
                ),popup=pop_pop) %>%
  leaflet::addLegend(position = "bottomleft", pal = pal, values = c(paste('$',round(min(states_2019$`Average Student Loans`))), 
                                                          paste('$',round(max(states_2019$`Average Student Loans`)))),
            title = "Average Student Loans (Per Student)") %>%
  leaflet::setView(-98.5795, 39.8282, zoom=3)
?addLegend
```

```{r}
brewer.pal(n = 8, name = "RdYlGn")
```

```{r Student Debt Map - points, echo=TRUE, eval=TRUE }
# last edit to sc_time_2019 in chunk above for chloropleth
sc_time_2019_selective <- sc_time_2019 %>% dplyr::rename(lat = LATITUDE) %>% dplyr::rename(long = LONGITUDE) %>% 
  dplyr::rename(state = STABBR) %>% filter(ADM_RATE < 0.3) %>% 
  dplyr::mutate(uni_rank = case_when(
    ADM_RATE < 0.05 ~ 'elite',
    ADM_RATE < 0.2 ~ 'highly selective',
    TRUE ~ 'selective'))
sc_time_2019_selective <- sc_time_2019_selective %>% subset(DEBT_MDN !='PrivacySuppressed') %>% 
  transform(DEBT_MDN = as.numeric(DEBT_MDN)) %>% 
  dplyr::mutate(DEBT_MDN = ifelse(is.na(DEBT_MDN), 0, DEBT_MDN)) 



pal1 = colorFactor(ShortPuBuGn, domain = sc_time_2019_selective$`uni_rank`,reverse=TRUE)

#set popups 
content <- paste("School",sc_time_2019_selective$INSTNM,"<br/>",
                 "Number of Undergrads:",sc_time_2019_selective$UGDS,"<br/>",
                 "Selectivity:", sc_time_2019_selective$uni_rank,"<br/>",
                 "Median Debt:",paste('$',round(sc_time_2019_selective$DEBT_MDN,2)),"<br/>")
sc_time_2019_selective
leaflet(sc_time_2019_selective) %>% addTiles('http://{s}.basemaps.cartocdn.com/dark_all/{z}/{x}/{y}.png') %>%
  addCircles(col = ~pal1(sc_time_2019_selective$uni_rank),
             radius = ~DEBT_MDN,
             popup = content,
             fillOpacity = 0.7) %>%
  leaflet::addLegend(position = "bottomleft",pal = pal1, values = sc_time_2019_selective$uni_rank,
            title = "Average Student Loans (Per Student)")
?addLegend
```
```{r}
setwd('~/Dropbox (Business)/Spring 2021/QMSS 5063 - Data Visualization /Group_G_HigherEd/')
sc_time <- read_csv('src/2010_2019_student_debt.csv') 
sc_time
```

The code chunk below has not been touched and includes Shiny code for later use. 
```{r}
setwd('~/Dropbox (Business)/Spring 2021/QMSS 5063 - Data Visualization /Group_G_HigherEd/')
sc_time <- read_csv('src/2010_2019_student_debt.csv') 

library(reshape2)
library(shiny)
ui <- fluidPage(
  titlePanel("I love Graphs about Student Debt"),
  # CODE BELOW: Add select input named "sex" to choose between "M" and "F"
  selectInput('year',
  'Year',
  choices=c(2010,2011,2012,2013,2014,2015,2016,2017,2018,2019)), 
  # Add plot output to display top 10 most popular names
  leafletOutput("studentdebtmap")
)

server <- function(input, output, session){
  # Render plot of top 10 most popular names
    sc_time_selective <- reactive({
      sc_time %>% 
        dplyr::filter(Year_Ending == input$year) %>% 
        dplyr::filter(DEBT_MDN !='PrivacySuppressed') %>%
        transform(DEBT_MDN = as.numeric(DEBT_MDN)) %>% 
        dplyr::mutate(DEBT_MDN = ifelse(is.na(DEBT_MDN), 0, DEBT_MDN)) %>%
        mutate(DEBT_MDN_STUDENT = DEBT_MDN*UGDS) %>% 
        group_by(STABBR) %>% 
        mutate(`Average Student Loans`=sum(DEBT_MDN_STUDENT,na.rm=TRUE)/sum(UGDS,na.rm=TRUE)) %>% 
        dplyr::rename(lat = LATITUDE) %>% 
        dplyr::rename(long = LONGITUDE) %>% 
        dplyr::rename(state = STABBR) %>%
        filter(ADM_RATE < 0.3) %>% 
        dplyr::mutate(uni_rank = case_when(
          ADM_RATE < 0.05 ~ 'elite',
          ADM_RATE < 0.2 ~ 'highly selective',
          TRUE ~ 'selective'))
      })
    

    
  output$studentdebtmap <- renderLeaflet({
    content <- paste(
      "School",sc_time_selective$INSTNM,"<br/>",
      "Number of Undergrads:",sc_time_selective$UGDS,"<br/>",
      "Selectivity:", sc_time_selective$uni_rank,"<br/>",
      "Median Debt:",paste('$',round(sc_time_selective$DEBT_MDN,2)),"<br/>",
      "Year",sc_time_selective$`Year_Ending`)
    
    leaflet (sc_time) %>% addTiles('http://{s}.basemaps.cartocdn.com/dark_all/{z}/{x}/{y}.png') %>%
      addCircles(col = ~pal1(sc_time$uni_rank),
                 radius = ~DEBT_MDN,
                 popup = content,
                 fillOpacity = 0.7) %>%
      leaflet::addLegend(position = "bottomleft",pal = pal1, values = sc_time$uni_rank,
                         title = "Average Student Loans (Per Student)")
    })
    observe({
    leafletProxy("studentdebtmap", data = sc_time_selective())
  }) 
}

shinyApp(ui = ui, server = server)

```

```{r}
ui <- fluidPage(
  titlePanel('The Cost of Higher Education: An Exploration of Student Debt in American Universities')
  
)
 
```

